1 Introduction

The purpose of this notebook is to perform a patient-specific differential expression analysis (DEA) between the RT and CLL clusters. We will focus in the quiescent RT populations, since otherwise the proliferation signature will dominate the analysis.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(harmony)
library(UpSetR)
library(ggrepel)
library(ggforce)
library(tidyverse)

2.2 Define parameters

# Paths
path_to_12 <- here::here("results/R_objects/6.seurat_annotated_12.rds")
path_to_19 <- here::here("results/R_objects/6.seurat_annotated_19.rds")
path_to_3299 <- here::here("results/R_objects/6.seurat_annotated_3299.rds")
path_to_365 <- here::here("results/R_objects/6.seurat_annotated_365.rds")
path_to_63 <- here::here("results/R_objects/patient_63/3.seurat_annotated.rds")
path_to_save_rds <- here::here("6-differential_expression_analysis/tmp/patient_specific_differential_expression_analysis_rt_vs_cll.rds")
path_to_save_xlsx <- here::here("results/tables/DEA/patient_specific_differential_expression_analysis_rt_vs_cll.xlsx")


# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "mediumorchid2", "coral2", "blueviolet",
                   "indianred4", "deepskyblue1", "dimgray", "deeppink1",
                   "green", "lightgray", "hotpink1")


# Source functions
source(here::here("bin/utils.R"))


# Thresholds
alpha <- 0.05
min_avg_log2FC <- 0.25
selected_avg_log2FC_l <- c(
  "12" = 1.25,
  "19" = 0.75,
  "3299" = 0.75,
  "365" = 1.5,
  "63" = 1.25  
)
selected_avg_log2FC <- 1.25
selected_pct_cells <- 10
selected_significance_alpha <- alpha

2.3 Load data

paths_to_load <- c(
  "12" = path_to_12,
  "19" = path_to_19,
  "3299" = path_to_3299,
  "365" = path_to_365,
  "63" = path_to_63
)
seurat_list <- purrr::map(paths_to_load, readRDS)
seurat_list
## $`12`
## An object of class Seurat 
## 23326 features across 5785 samples within 1 assay 
## Active assay: RNA (23326 features, 2000 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap
## 
## $`19`
## An object of class Seurat 
## 23326 features across 7284 samples within 1 assay 
## Active assay: RNA (23326 features, 2000 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap
## 
## $`3299`
## An object of class Seurat 
## 23326 features across 6063 samples within 1 assay 
## Active assay: RNA (23326 features, 2000 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap
## 
## $`365`
## An object of class Seurat 
## 23326 features across 4685 samples within 1 assay 
## Active assay: RNA (23326 features, 2000 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap
## 
## $`63`
## An object of class Seurat 
## 13680 features across 983 samples within 1 assay 
## Active assay: RNA (13680 features, 2000 variable features)
##  2 dimensional reductions calculated: pca, umap
purrr::map(seurat_list, DimPlot, cols = color_palette)
## $`12`

## 
## $`19`

## 
## $`3299`

## 
## $`365`

## 
## $`63`

3 Differential expression analysis

comparisons <- list(
  "12" = list(
    richter = c("CCND2lo RT-like", "CCND2hi RT-like"),
    cll = c("CXCR4hiCD27lo", "CXCR4loCD27hi", "MIR155HGhi", "MZB1hiIGHMhiXBP1hi")
  ),
  "19" = list(
    richter = c("MIR155HGhi RT-like", "TP53INP1hi RT-like"),
    cll = c("CXCR4hiCD27lo", "CXCR4loCD27hi", "MIR155HGhi CLL-like")
  ),
  "3299" = list(
    richter = "RT-like",
    cll = c("CXCR4hiCD27lo", "CXCR4loCD27hi", "CD83loMIR155HGhi", "CD83hiMIR155HGhi")
  ),
  "365" = list(
    richter = c("RT-like quiescent", "CXCR4loCD27hi", "MIR155HGhi"),
    cll = c("CLL-like")
  ),
  "63" = list(
    richter = "RT-like quiescent",
    cll = "CLL-like"
  )
)
dea <- purrr::map(names(comparisons), function(x) {
  print(x)
  seurat_obj <- seurat_list[[x]]
  Idents(seurat_obj) <- "annotation_final"
  df <- FindMarkers(
    seurat_obj,
    ident.1 = comparisons[[x]][["richter"]],
    ident.2 = comparisons[[x]][["cll"]],
    only.pos = FALSE,
    logfc.threshold = 0
  )
  df <- df %>%
    rownames_to_column("gene") %>%
    arrange(desc(avg_log2FC))
  df
}) 
## [1] "12"
## [1] "19"
## [1] "3299"
## [1] "365"
## [1] "63"
names(dea) <- names(comparisons)
dea <- purrr::map(names(dea), function(x) {
  df <- dea[[x]]
  df$direction <- case_when(
    df$p_val_adj < alpha & df$avg_log2FC > min_avg_log2FC ~ "up",
    df$p_val_adj < alpha & df$avg_log2FC < 0 & abs(df$avg_log2FC) > min_avg_log2FC ~ "down"
  )
  df$direction[is.na(df$direction)] <- "not sig."
  pct_cells <- rowMeans(seurat_list[[x]][["RNA"]]@counts > 0) * 100
  df$pct_cells_expressing <- pct_cells[df$gene]
  df
})
names(dea) <- names(comparisons)
upregulated_richter <- purrr::map(dea, function(df) df$gene[df$direction == "up"])
downregulated_richter <- purrr::map(dea, function(df) df$gene[df$direction == "down"])
print("DEA patient 12")
## [1] "DEA patient 12"
DT::datatable(dea$`12`)
print("DEA patient 19")
## [1] "DEA patient 19"
DT::datatable(dea$`19`)
print("DEA patient 3299")
## [1] "DEA patient 3299"
DT::datatable(dea$`3299`)
print("DEA patient 365")
## [1] "DEA patient 365"
DT::datatable(dea$`365`)
print("DEA patient 63")
## [1] "DEA patient 63"
DT::datatable(dea$`63`)

4 Upset plots

upset_upregulated <- upset(fromList(upregulated_richter), order.by = "freq")
upset_downregulated <- upset(fromList(downregulated_richter), order.by = "freq")
upset_upregulated

upset_downregulated

5 MA plots

ma_plots <- purrr::map(names(dea), function(x) {
  df <- dea[[x]]
  selected_avg_log2FC <- selected_avg_log2FC_l[x]
  p <- ma_plot(
    df,
    selected_avg_log2FC = selected_avg_log2FC,
    selected_pct_cells = selected_pct_cells,
    selected_significance_alpha = selected_significance_alpha
  ) +
    ggtitle(x) +
    theme(plot.title = element_text(hjust = 0.5))
  p
})
names(ma_plots) <- names(dea)
ma_plots
## $`12`

## 
## $`19`

## 
## $`3299`

## 
## $`365`

## 
## $`63`

6 Save

saveRDS(dea, path_to_save_rds)
openxlsx::write.xlsx(dea, path_to_save_xlsx)

7 Session Information

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1        stringr_1.4.0        dplyr_1.0.6          purrr_0.3.4          readr_1.4.0          tidyr_1.1.3          tibble_3.1.2         tidyverse_1.3.1      ggforce_0.3.3        ggrepel_0.9.1        ggplot2_3.3.3        UpSetR_1.4.0         harmony_1.0          Rcpp_1.0.6           SeuratWrappers_0.3.0 SeuratObject_4.0.2   Seurat_4.0.3         BiocStyle_2.18.1    
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.2.1       plyr_1.8.6            igraph_1.2.6          lazyeval_0.2.2        splines_4.0.4         crosstalk_1.1.1       listenv_0.8.0         scattermore_0.7       digest_0.6.27         htmltools_0.5.1.1     fansi_0.5.0           magrittr_2.0.1        tensor_1.5            cluster_2.1.1         ROCR_1.0-11           openxlsx_4.2.3        limma_3.46.0          remotes_2.4.0         globals_0.14.0        modelr_0.1.8          matrixStats_0.59.0    spatstat.sparse_2.0-0 colorspace_2.0-1      rvest_1.0.0           haven_2.4.1           xfun_0.23             crayon_1.4.1          jsonlite_1.7.2        spatstat.data_2.1-0   survival_3.2-10       zoo_1.8-9             glue_1.4.2            polyclip_1.10-0       gtable_0.3.0          leiden_0.3.8          future.apply_1.7.0    abind_1.4-5           scales_1.1.1          DBI_1.1.1             miniUI_0.1.1.1        viridisLite_0.4.0     xtable_1.8-4          reticulate_1.20       spatstat.core_2.1-2   rsvd_1.0.5            DT_0.18               htmlwidgets_1.5.3     httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2             pkgconfig_2.0.3       farver_2.1.0         
##  [55] sass_0.4.0            uwot_0.1.10           dbplyr_2.1.1          deldir_0.2-10         here_1.0.1            utf8_1.2.1            labeling_0.4.2        tidyselect_1.1.1      rlang_0.4.11          reshape2_1.4.4        later_1.2.0           munsell_0.5.0         cellranger_1.1.0      tools_4.0.4           cli_2.5.0             generics_0.1.0        broom_0.7.7           ggridges_0.5.3        evaluate_0.14         fastmap_1.1.0         yaml_2.2.1            goftest_1.2-2         knitr_1.33            fs_1.5.0              fitdistrplus_1.1-5    zip_2.2.0             RANN_2.6.1            pbapply_1.4-3         future_1.21.0         nlme_3.1-152          mime_0.10             xml2_1.3.2            rstudioapi_0.13       compiler_4.0.4        plotly_4.9.4          png_0.1-7             spatstat.utils_2.2-0  reprex_2.0.0          tweenr_1.0.2          bslib_0.2.5.1         stringi_1.6.2         highr_0.9             lattice_0.20-41       Matrix_1.3-4          vctrs_0.3.8           pillar_1.6.1          lifecycle_1.0.0       BiocManager_1.30.15   spatstat.geom_2.1-0   lmtest_0.9-38         jquerylib_0.1.4       RcppAnnoy_0.0.18      data.table_1.14.0     cowplot_1.1.1        
## [109] irlba_2.3.3           httpuv_1.6.1          patchwork_1.1.1       R6_2.5.0              bookdown_0.22         promises_1.2.0.1      KernSmooth_2.23-18    gridExtra_2.3         parallelly_1.26.0     codetools_0.2-18      MASS_7.3-53.1         assertthat_0.2.1      rprojroot_2.0.2       withr_2.4.2           sctransform_0.3.2     mgcv_1.8-36           parallel_4.0.4        hms_1.1.0             grid_4.0.4            rpart_4.1-15          rmarkdown_2.8         Rtsne_0.15            shiny_1.6.0           lubridate_1.7.10